library(tidyverse)
## ── Attaching packages ─────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(leaflet)
library(ggplot2)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Read in wine data.
year_extract <- function(string) {
t <- regmatches(string, regexec("[1-2][9|0][0-9][0-9]", string))
sapply(t, function(x) {
if (length(x) > 0) {
return(as.numeric(x))
} else {
return(NA)
}
})
}
wine_tidy_df =
read_csv(
"./wine_data/winemag-data-130k-v2.csv") %>%
janitor::clean_names() %>%
select(-region_2, -taster_twitter_handle, -taster_name, -x1) %>%
mutate(year = year_extract(title))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## country = col_character(),
## description = col_character(),
## designation = col_character(),
## points = col_double(),
## price = col_double(),
## province = col_character(),
## region_1 = col_character(),
## region_2 = col_character(),
## taster_name = col_character(),
## taster_twitter_handle = col_character(),
## title = col_character(),
## variety = col_character(),
## winery = col_character()
## )
wine_us = wine_tidy_df %>%
filter(country == "US") %>%
rename(
state = province) %>%
mutate(
state = na_if(state, "America")
) %>%
select(state, points, price) %>%
drop_na() %>%
group_by(state) %>%
summarise(
total = n(),
avg_rating = mean(points),
avg_price = mean(price)
) %>%
mutate(
avg_rating = round(avg_rating, 0),
avg_price = round(avg_price, 2)
) %>%
arrange(desc(total)) %>%
view
## `summarise()` ungrouping output (override with `.groups` argument)
wine_us_missing = wine_tidy_df %>%
filter(country == "US") %>%
rename(
state = province) %>%
select(state, points, price) %>%
mutate(
state = na_if(state, "America")
)
map(wine_us_missing, ~ sum(is.na(.)))
## $state
## [1] 95
##
## $points
## [1] 0
##
## $price
## [1] 239
Missing Values: * State: 95 * Points: 0 * Price: 239 Several wines that were made in the US did not have a province/state listed. Instead they were labeled as America. Recoded America into NA. many wines also did not have prices listed. Wines that did not have points, prices, or province were excluded from the final mapping dataset…
Rounded rating to nearest whole number and price to 2 decimal places.
wine_by_country = wine_tidy_df %>%
mutate(
country = recode(country, US = "United States")
) %>%
select(country, points, price) %>%
group_by(country) %>%
drop_na() %>%
summarise(
total = n(),
avg_rating = mean(points),
avg_price = mean(price)
) %>%
mutate(
avg_rating = round(avg_rating, 0),
avg_price = round(avg_price, 2)
) %>%
arrange(desc(total)) %>%
view
## `summarise()` ungrouping output (override with `.groups` argument)
wine_country_missing = wine_tidy_df %>%
mutate(
country = recode(country, US = "United States")
) %>%
select(country, points, price)
map(wine_country_missing, ~ sum(is.na(.)))
## $country
## [1] 63
##
## $points
## [1] 0
##
## $price
## [1] 8996
Missing Values: * Country: 63 * Points: 0 * Price: 8996 Several wines did not contain country of origin or price. Wines that did not have country or prices were excluded from the final world mapping dataset…
states <- states(cb = TRUE)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
states %>%
leaflet() %>%
addTiles() %>%
addPolygons(popup = ~NAME)
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
states_merged_wine <- geo_join(states, wine_us, "NAME", "state")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
mybins <- c(0,100,1000,8000,10000,40000)
mypal <- colorBin(palette = "Purples", domain = states_merged_wine$total, na.color = "transparent", bins = mybins)
states_merged_wine <- subset(states_merged_wine, !is.na(total))
popup <- paste0(
states_merged_wine$NAME,"<br>",
"Wineries: ", states_merged_wine$total, "<br>",
"Avg Rating: ", states_merged_wine$avg_rating, "<br>",
"Avg Price: ", states_merged_wine$avg_price,"<br>"
)
states_merged_wine %>%
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(
fillColor = ~mypal(total),
fillOpacity = 1.0,
weight = 0.4,
smoothFactor = 0.2,
popup = ~popup) %>%
addLegend(pal = mypal,
values = states_merged_wine$total,
position = "bottomright",
title = "Wineries")
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
wine_by_country
## # A tibble: 42 x 4
## country total avg_rating avg_price
## <chr> <int> <dbl> <dbl>
## 1 United States 54265 89 36.6
## 2 France 17776 89 41.1
## 3 Italy 16914 89 39.7
## 4 Spain 6573 87 28.2
## 5 Portugal 4875 88 26.2
## 6 Chile 4416 86 20.8
## 7 Argentina 3756 87 24.5
## 8 Austria 2799 90 30.8
## 9 Australia 2294 89 35.4
## 10 Germany 2120 90 42.3
## # … with 32 more rows
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.1.1, released 2020/06/22
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/sf/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
world_spdf <- readOGR(
dsn = paste0(getwd(),"/wine_data/world_shape_file/") ,
layer = "TM_WORLD_BORDERS_SIMPL-0.3",
verbose = FALSE
)
world_spdf %>%
leaflet() %>%
addTiles() %>%
addPolygons(popup = ~NAME)
countries_merged_wine <- geo_join(world_spdf, wine_by_country, "NAME", "country")
world_bins <- c(0,100,1000,10000,20000,30000,60000)
world_pal <- colorBin(palette = "Reds", domain = countries_merged_wine$total, na.color = "transparent", bins = world_bins)
countries_merged_wine <- subset(countries_merged_wine, !is.na(total))
world_popup <- paste0(
countries_merged_wine$country,"<br>",
"Wineries: ", countries_merged_wine$total, "<br>",
"Avg Rating: ", countries_merged_wine$avg_rating, "<br>",
"Avg Price: ", countries_merged_wine$avg_price,"<br>"
)
countries_merged_wine %>%
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView( lat = 10, lng = 0 , zoom = 2) %>%
addPolygons(
fillColor = ~world_pal(total),
fillOpacity = 1.0,
weight = 0.4,
smoothFactor = 0.2,
popup = ~world_popup) %>%
addLegend(pal = world_pal,
values = countries_merged_wine$total,
position = "bottomright",
title = "Wineries")
Is legend necessary?
Aesthetics….better shape files? overlay is a lil funky looking